Explore CMIP6 data on Casper¶

Ensure you have the required libraries installed, these will make it much easier to work with the data

In [ ]:
! pip install netcdf4 xarray[io] cartopy nc-time-axis
In [142]:
import pandas as pd
import xarray as xr
import numpy as np
import os.path

import matplotlib.pyplot as plt
# Useful for plotting maps
import cartopy.crs as ccrs

# This can be useful for working with multiple processors - to be explored later on
# from dask.distributed import Client, LocalCluster

Output data is in /glade/collections/cmip/CMIP6/{activity}/NCC/NorESM2-LM/{experiment}

You can also find other model data here, in particular the NCAR model: Example path: /glade/collections/cmip/CMIP6/DAMIP/NCAR/CESM2/hist-aer/r1i1p1f1/Amon/tas/gn/latest/*.nc

Input data is in: /glade/p/cesmdata/cseg/inputdata/atm/cam/chem/emis/

The model names are not very obvious but you can either google them, ask ChatGPT, or look them up in these structured dictionaries: https://github.com/PCMDI/cmip6-cmor-tables/tree/main/Tables (which can be queried with e.g. Pandas)

In [143]:
def get_MIP(experiment):
    """
    Utility function to get teh activity associated with a particular experiment
    """
    if experiment == 'ssp245-covid':
        return 'DAMIP'
    elif experiment == 'ssp370-lowNTCF':
        return 'AerChemMIP'
    elif experiment.startswith('ssp'):
        return 'ScenarioMIP'
    elif experiment.startswith('hist-'):
        return 'DAMIP'
    else:
        return 'CMIP'
In [144]:
def get_data(variable, experiment, member):
    """
    Read a particular CMIP6 (Amon) variable from NorESM2
    """
    import glob
    files = glob.glob(f"/glade/collections/cmip/CMIP6/{get_MIP(experiment)}/NCC/NorESM2-LM/{experiment}/{member}/Amon/{variable}/gn/v20190815/{variable}/*.nc")
    return xr.open_mfdataset(files)[variable]
In [145]:
tas = get_data('tas', 'historical', 'r1i1p1f1')

Note, the ensemble member format: r for realization, i for initialization, p for physics, and f for forcing

We're only interested in different realizations in this project, so try different r numbers but keep the rest the same: E.g.: r1i1p1f1, r2i1p1f1, r3i1p1f1

In [146]:
# When averaging gridded data on a sphere, we need to account for the fact that the values near the poles have less area
weights = np.cos(np.deg2rad(tas.lat))
weights.name = "weights"

tas_timeseries = tas.weighted(weights).mean(['lat', 'lon'])
In [147]:
tas_timeseries.plot()
Out[147]:
[<matplotlib.lines.Line2D at 0x14afa2bde800>]
No description has been provided for this image
In [9]:
# Plot a map of the average temperature between 1850-1900

tas.sel(time=slice('1850','1900')).mean('time').plot(
    transform=ccrs.PlateCarree(), # This is the projection the data is stored as
    subplot_kws={"projection": ccrs.PlateCarree()}, # This describes the projection to plot onto (which happens to be the projection the data is already in so no transformation is needed in this case)
)

# Feel free to explore other projections here: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html

plt.gca().coastlines()
Out[9]:
<cartopy.mpl.feature_artist.FeatureArtist at 0x14afe6b1f160>
No description has been provided for this image

Week 3 Tasks - Oct 2024¶

1. Plot a map of the average global temperature between 2005-2015.¶

In [10]:
# Plot a map of the average temperature between 2005-2015

tas.sel(time=slice('2005','2015')).mean('time').plot(
    transform=ccrs.PlateCarree(), # This is the projection the data is stored as
    subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)

# Mollweide projection from: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html

plt.gca().coastlines()

plt.title('Average Global Temperature 2005-2015', pad=10)
Out[10]:
Text(0.5, 1.0, 'Average Global Temperature 2005-2015')
No description has been provided for this image

2. Plot a map of the difference in global temperature from 1850-1900 to 2005-2015 with appropriate colorbar and title.¶

In [11]:
# get the difference in global temperature from 1850-1900 to 2005-2015
diff_tas = tas.sel(time=slice('2005','2015')).mean('time') - tas.sel(time=slice('1850','1900')).mean('time')
In [12]:
# Plot a map of the difference in global temperature from 1850-1900 to 2005-2015
diff_tas.plot(
    transform=ccrs.PlateCarree(), # This is the projection the data is stored as
    subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)

# Mollweide projection from: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html

plt.gca().coastlines()

plt.title('Difference in Global Temperature\nfrom 1850-1900 to 2005-2015', pad=20)
Out[12]:
Text(0.5, 1.0, 'Difference in Global Temperature\nfrom 1850-1900 to 2005-2015')
No description has been provided for this image

3. Plot similar maps for precipitation and daily maximium temperature.¶

In [13]:
# get and store precipitation data
pr = get_data('pr', 'historical', 'r1i1p1f1')
In [14]:
# Plot a map of the average precipitation between 2005-2015
pr.sel(time=slice('2005','2015')).mean('time').plot(
    transform=ccrs.PlateCarree(), # This is the projection the data is stored as
    subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)

plt.gca().coastlines()

plt.title('Average Global Precipitation 2005-2015', pad=20)
Out[14]:
Text(0.5, 1.0, 'Average Global Precipitation 2005-2015')
No description has been provided for this image
In [15]:
# get the difference in global temperature from 1850-1900 to 2005-2015
diff_pr = pr.sel(time=slice('2005','2015')).mean('time') - pr.sel(time=slice('1850','1900')).mean('time')
In [16]:
# Plot a map of the difference in global precipitation from 1850-1900 to 2005-2015
diff_pr.plot(
    transform=ccrs.PlateCarree(), # This is the projection the data is stored as
    subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)

# Mollweide projection from: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html

plt.gca().coastlines()

plt.title('Difference in Global Precipitation\nfrom 1850-1900 to 2005-2015', pad=20)
Out[16]:
Text(0.5, 1.0, 'Difference in Global Precipitation\nfrom 1850-1900 to 2005-2015')
No description has been provided for this image
In [17]:
# write a function for getting tasmax
def get_tasmax(experiment, member):
    import glob
    # files = glob.glob("/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/historical/r1i1p1f1/day/tasmax/")
    files = glob.glob(f"/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/{experiment}/{member}/day/tasmax/**/*.nc", recursive=True)

    # "/glade/collections/cmip/CMIP6/{get_MIP(experiment)}/NCC/NorESM2-LM/{experiment}/{member}/Amon/{variable}/gn/v20190815/{variable}/*.nc"
    return xr.open_mfdataset(files)['tasmax']
In [18]:
# get and store daily maximum temperature data
tasmax = get_tasmax('historical', 'r1i1p1f1')
In [19]:
# Plot a map of the daily maximum temperature between 2005-2015
tasmax.sel(time=slice('2005','2015')).mean('time').plot(
    transform=ccrs.PlateCarree(), # This is the projection the data is stored as
    subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)

plt.gca().coastlines()

plt.title('Average Daily Maximum Temperature 2005-2015', pad=20)
Out[19]:
Text(0.5, 1.0, 'Average Daily Maximum Temperature 2005-2015')
No description has been provided for this image
In [20]:
# get the difference in daily maximum temperature from 1850-1900 to 2005-2015
diff_tasmax = tasmax.sel(time=slice('2005','2015')).mean('time') - tasmax.sel(time=slice('1850','1900')).mean('time')
In [21]:
# Plot a map of the difference in daily maximum temperature from 1850-1900 to 2005-2015
diff_tasmax.plot(
    transform=ccrs.PlateCarree(), # This is the projection the data is stored as
    subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)

# Mollweide projection from: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html

plt.gca().coastlines()

plt.title('Difference in Average Daily Maximum Temperature\nfrom 1850-1900 to 2005-2015', pad=20)
Out[21]:
Text(0.5, 1.0, 'Difference in Average Daily Maximum Temperature\nfrom 1850-1900 to 2005-2015')
No description has been provided for this image

4. Plot a global average time-series of the variables above. Optionally include multiple ensemble members (or an indication of the spread across ensemble members).¶

In [22]:
# Plot global average temperature time-series

tas_timeseries = tas.weighted(weights).mean(['lat', 'lon'])

tas_timeseries.plot()

plt.title('Average Global Temperature')
Out[22]:
Text(0.5, 1.0, 'Average Global Temperature')
No description has been provided for this image
In [23]:
# Plot global average precipitation time-series

pr_timeseries = pr.weighted(weights).mean(['lat', 'lon'])

pr_timeseries.plot()

plt.title('Average Global Precipitation')
Out[23]:
Text(0.5, 1.0, 'Average Global Precipitation')
No description has been provided for this image
In [24]:
tasmax_month_avg = tasmax.resample(time='ME').mean()
In [25]:
# Plot global average daily maximum temperature time-series

tasmax_timeseries = tasmax_month_avg.weighted(weights).mean(['lat', 'lon'])

tasmax_timeseries.plot()

plt.title('Average Global Daily Maximum Temperature')
Out[25]:
Text(0.5, 1.0, 'Average Global Daily Maximum Temperature')
No description has been provided for this image

5. Pick another variable and produce similar plots.¶

In [26]:
# write a function for getting tasmax
def get_tasmin(experiment, member):
    import glob
    # files = glob.glob("/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/historical/r1i1p1f1/day/tasmax/")
    files = glob.glob(f"/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/{experiment}/{member}/day/tasmin/**/*.nc", recursive=True)

    # "/glade/collections/cmip/CMIP6/{get_MIP(experiment)}/NCC/NorESM2-LM/{experiment}/{member}/Amon/{variable}/gn/v20190815/{variable}/*.nc"
    return xr.open_mfdataset(files)['tasmin']
In [27]:
tasmin = get_tasmin('historical', 'r1i1p1f1')
In [28]:
# Plot a map of the daily minimum temperature between 2005-2015
tasmin.sel(time=slice('2005','2015')).mean('time').plot(
    transform=ccrs.PlateCarree(), # This is the projection the data is stored as
    subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)

plt.gca().coastlines()

plt.title('Average Daily Minimum Temperature 2005-2015', pad=20)
Out[28]:
Text(0.5, 1.0, 'Average Daily Minimum Temperature 2005-2015')
No description has been provided for this image
In [29]:
# get the difference in daily minimum temperature from 1850-1900 to 2005-2015
diff_tasmin = tasmin.sel(time=slice('2005','2015')).mean('time') - tasmin.sel(time=slice('1850','1900')).mean('time')
In [30]:
# Plot a map of the difference in daily maximum temperature from 1850-1900 to 2005-2015
diff_tasmin.plot(
    transform=ccrs.PlateCarree(), # This is the projection the data is stored as
    subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)

# Mollweide projection from: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html

plt.gca().coastlines()

plt.title('Difference in Average Daily Minimum Temperature\nfrom 1850-1900 to 2005-2015', pad=20)
Out[30]:
Text(0.5, 1.0, 'Difference in Average Daily Minimum Temperature\nfrom 1850-1900 to 2005-2015')
No description has been provided for this image
In [31]:
tasmin_month_avg = tasmin.resample(time='ME').mean()
In [32]:
# Plot global average daily minimum temperature time-series
tasmin_timeseries = tasmin_month_avg.weighted(weights).mean(['lat', 'lon'])

tasmin_timeseries.plot()

plt.title('Average Global Daily Minimum Temperature')
Out[32]:
Text(0.5, 1.0, 'Average Global Daily Minimum Temperature')
No description has been provided for this image

EDA for Project¶

In [148]:
# Function for plotting maps and time series for specified variable

output_folder = './eda'

def average_variable_eda(var, var_name, title):
    var_1850_1900 = var.sel(time=slice('1850', '1900')).mean('time')

    fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
    c = var_1850_1900.plot(
        ax=ax,
        transform=ccrs.PlateCarree(),
        cbar_kwargs={'orientation': 'horizontal'}  # Make the color bar horizontal
    )
    plt.title(f"Average {title} Between 1850-1900")
    ax.coastlines()
    plt.subplots_adjust(bottom=0.15)  # Adjust as needed
    output_filename = f"avg_{var_name}_1850_1900.png"
    plt.savefig(f"{output_folder}/{output_filename}")
    plt.show()

    var_2005_2005 = var.sel(time=slice('2005', '2015')).mean('time')

    fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
    c = var_2005_2005.plot(
        ax=ax,
        transform=ccrs.PlateCarree(),
        cbar_kwargs={'orientation': 'horizontal'}  # Make the color bar horizontal
    )
    plt.title(f"Average {title} Between 2005-2015")
    ax.coastlines()
    plt.subplots_adjust(bottom=0.15)  # Adjust as needed
    output_filename = f"avg_{var_name}_2005_2015.png"
    plt.savefig(f"{output_folder}/{output_filename}")
    plt.show()

    difference = var.sel(time=slice('2005', '2015')).mean('time') - var.sel(time=slice('1850', '1900')).mean('time')
        #More recent - past

    fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
    c = difference.plot(
        ax=ax,
        transform=ccrs.PlateCarree(),
        cbar_kwargs={'orientation': 'horizontal'}  # Make the color bar horizontal
    )
    plt.title(f"Difference in {title} From 1850-1900 to 2005-2015")
    ax.coastlines()
    plt.subplots_adjust(bottom=0.15)  # Adjust as needed
    output_filename = f"diff_avg_{var_name}.png"
    plt.savefig(f"{output_folder}/{output_filename}")
    plt.show()

    weights = np.cos(np.deg2rad(var.lat))
    weights.name = "weights"
    
    var_timeseries = var.weighted(weights).mean(['lat', 'lon'])

    var_timeseries.plot()
    plt.title(f"{title} Timeseries")
    output_filename = f"{var_name}_timeseries.png"
    plt.savefig(f"{output_folder}/{output_filename}")
    plt.show()

    return
In [149]:
experiment = 'historical'
member = 'r2i1p1f1'
variable = 'clt'
import glob
# files = glob.glob("/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/historical/r1i1p1f1/day/tasmax/")
# files = glob.glob(f"/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/{experiment}/{member}/*/{variable}/**/*.nc", recursive=True)
files = glob.glob(f"/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/{experiment}/{member}/day/{variable}/**/**", recursive=True)

# "/glade/collections/cmip/CMIP6/{get_MIP(experiment)}/NCC/NorESM2-LM/{experiment}/{member}/Amon/{variable}/gn/v20190815/{variable}/*.nc"
files
Out[149]:
['/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/historical/r2i1p1f1/day/clt/']
In [ ]:
import xarray as xr
import glob
import os

def find_variables_in_path(base_path):
    # Search for all variable subdirectories (e.g., 'tas', 'pr', etc.) in the 'Amon' folder
    variable_dirs = glob.glob(f"{base_path}/Amon/*/")
    
    print("Found variables:")
    for variable_dir in variable_dirs:
        # Extract the variable name from the directory path
        variable_name = os.path.basename(os.path.normpath(variable_dir))
        print(variable_name)

        # Optionally, you can explore the contents of one of the variable files
        nc_files = glob.glob(f"{variable_dir}/gn/latest/*.nc")
        if nc_files:
            # Open the first file and list its variables
            ds = xr.open_dataset(nc_files[0])
            # print(f"  Variables inside {variable_name}:")
            # for var in ds.variables:
            #     print(f"    {var}")
        else:
            print(f"  No .nc files found for {variable_name}")

# Path to where you want to search (adjusting to go up one directory from 'tas')
base_path = "/glade/collections/cmip/CMIP6/DAMIP/NCAR/CESM2/hist-aer/r1i1p1f1"
find_variables_in_path(base_path)
In [150]:
# Function for getting CMIP data
def get_CMIP(variable, experiment, member):
    import glob
    files = glob.glob(f"/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/{experiment}/{member}/day/{variable}/**/*.nc", recursive=True)
    return xr.open_mfdataset(files)[variable]
In [187]:
# Function for getting DAMIP data
def get_DAMIP(variable, member):
    import glob
    files = glob.glob(f'/glade/collections/cmip/CMIP6/DAMIP/NCAR/CESM2/*/{member}/*/{variable}/gn/*/*.nc', recursive=True)
    return xr.open_mfdataset(files)[variable]
In [39]:
hfss = get_hfss('hfss', 'historical', 'r2i1p1f1')
In [ ]:
hfss
In [ ]:
# Plot a map of the difference in daily maximum temperature from 1850-1900 to 2005-2015
hfss.plot(
    transform=ccrs.PlateCarree(), # This is the projection the data is stored as
    subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)

# Mollweide projection from: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html

plt.gca().coastlines()

plt.title('Difference in Surface Sensible Heat Flux\nfrom 1850-1900 to 2005-2015', pad=20)
In [40]:
average_variable_eda(hfss, 'hfss', 'Surface Sensible Heat Flux')
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
clt = get_DAMIP('clt', 'r1i1p1f1')
In [154]:
average_variable_eda(clt, 'clt', 'Total Cloud Fraction')
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
ci = get_DAMIP('ci', 'r1i1p1f1')
In [158]:
average_variable_eda(ci, 'ci', 'Sea Ice Concentration')
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
clivi = get_DAMIP('clivi', 'r1i1p1f1')
In [162]:
average_variable_eda(clivi, 'clivi', 'Column-Integrated Ice Water')
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
cl = get_DAMIP('cl', 'r1i1p1f1')
In [165]:
average_variable_eda(cl.mean(dim='lev'), 'cl', 'Column-Integrated Ice Water')
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
co2mass = get_DAMIP('co2mass', 'r1i1p1f1')
In [ ]:
rldscs = get_DAMIP('rldscs', 'r1i1p1f1')
In [180]:
average_variable_eda(rldscs, 'rldscs', 'Clear-Sky Surface Downwelling Longwave Radiation')
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
clwvi = get_DAMIP('clwvi', 'r1i1p1f1')
In [198]:
average_variable_eda(clwvi, 'clwvi', 'Column-Integrated Liquid Water')
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
rlutcs = get_DAMIP('rlutcs', 'r1i1p1f1')
In [201]:
average_variable_eda(rlutcs, 'rlutcs', 'Clear-Sky Upwelling Longwave Radiation')
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]: